# Code purpose: conduct all analyses in main text of 
# "Ethnicity and Policing in the Global South: Descriptive Representation
# and Expectations of Police Bias"
# Input file: LyonMalik_Policing_Data.csv
# Output files: All output sent to replication folder

# Load Data and Libraries -------------------------------------------------

# Load Libraries
pkgs <- c("tidyverse", "rio", "rcompanion", "estimatr", "ggthemes", "texreg", 
          "scales", "cowplot", "countrycode")
invisible(lapply(pkgs, library, character.only = T))


# Load Clean Survey Data 
root <- "ADD YOUR PATH HERE" # set path to replication file folder 
karachi <- import(paste0(root, "/LyonMalik_Policing_Data.csv"))
wvs <- import(paste0(root, "/WVS DATA GOES HERE")) # add WVS file here


# Figure 1: Confidence in Police from WVS ---------------------------------

wvs %>%
  mutate(# get country names
    country = countrycode(V2, origin = "wvs", destination = "country.name"),
    # confidence in the police, coded so 0 = "none at all", 1 = "not very much",
    # 2 = "quite a lot", and 3 = "a great deal"
    conf_pol = case_when(
      V113 == 4 ~ 0,
      V113 == 3 ~ 1, 
      V113 == 2 ~ 2, 
      V113 == 1 ~ 3
      )) %>%
  # get country level means
  group_by(country) %>%
  summarise(mean_conf_pol = mean(conf_pol, na.rm = T)) %>%
  mutate(pakistan = ifelse(country == "Pakistan", 1, 0)) %>%
  # plot
  ggplot(aes(x = mean_conf_pol, y = reorder(country, mean_conf_pol),
             fill = factor(pakistan))) + 
  geom_bar(stat = "identity") + 
  coord_cartesian(xlim = c(0, 3)) +
  scale_fill_discrete(type = c("darkgray", "black"), guide = F) +
  scale_x_continuous(labels=c("0" = "None at all",
                              "1" = "Not very much",
                              "2" = "Quite a lot",
                              "3" = "A great deal")) +
  labs(x = "\nLevel of Confidence",
       y = "") +
  theme_bw() + 
  theme(axis.text.x = element_text(angle = 30, hjust = 1),
        axis.text=element_text(size=9),
        axis.title=element_text(size=14, face="bold"),
        legend.title = element_blank())

ggsave(paste0(root, "/figure1.pdf"), plot = last_plot(),
       height = 8, width = 5, units = "in")  


# Figure 2: Respondent Perceptions of Coethnics in Force ------------------

# get mean and 95% CI for estimate of coethnic presence, by ethnicity
groupwiseMean(n_coethnic ~ ethnicity, data = karachi, conf = 0.95) %>%
  # plot mean and 95% CI by group
  ggplot() + 
  geom_bar(aes(x = reorder(ethnicity, Mean), y = Mean),
           stat = 'identity', fill = 'darkgrey') + 
  geom_errorbar(aes(x = reorder(ethnicity, Mean), 
                    ymin = Trad.lower, ymax = Trad.upper),
                width = 0.25) +
  labs(x = "\n Ethnic Groups in Survey Sample",
       y = "Percent of Local Police (%) \n") +
  theme_bw() + 
  theme(axis.text.y = element_text(angle = 30, vjust = 0.5),
        axis.text=element_text(size=12),
        axis.title=element_text(size=14,face="bold"))

ggsave(paste0(root, "/figure2.pdf"), plot = last_plot(),
       height = 4, width = 6, units = "in")


# Table 1: Demographic Determinants of Trust in Police -----------------------

# subset data to experimental sample of muhajir, pashtun, and sindhi respondents
exp_sample <- subset(karachi, ethnicity == "Muhajir" | ethnicity == "Pashtun" | ethnicity == "Sindhi")

# number corresponds to model number in table
dem_reg1 <- lm_robust(police_trust ~ under30 + non_sindhi + education + years_lived + total_crimes, 
                      data = exp_sample, clusters = psu)
dem_reg2 <- lm_robust(police_trust ~ under30 + non_sindhi + education + years_lived + total_crimes + male, 
                      data = exp_sample, clusters = psu)
dem_reg3 <- lm_robust(police_trust ~ under30 + non_sindhi + education + years_lived + total_crimes, 
                      # model 3 run on 30 and under subset
                      data = subset(exp_sample, under30 == 1), 
                      clusters = psu)
dem_reg4 <- lm_robust(police_trust ~ under30 + non_sindhi + education + years_lived + total_crimes + male, 
                      # model 4 run on 30 and under subset
                      data = subset(exp_sample, under30 == 1), 
                      clusters = psu)
dem_reg5 <- lm_robust(police_trust ~ under30 + non_sindhi + education + years_lived + total_crimes, 
                      # model 5 run on over 30 subset
                      data = subset(exp_sample, under30 == 0), 
                      clusters = psu)
dem_reg6 <- lm_robust(police_trust ~ under30 + non_sindhi + education + years_lived + total_crimes + male, 
                      # model 6 run on over 30 subset
                      data = subset(exp_sample, under30 == 0), 
                      clusters = psu)

# Output regression results
texreg(list(dem_reg1, dem_reg2, dem_reg3, dem_reg4, dem_reg5, dem_reg6),
       include.ci = F,
       omit.coef = c("education|years_lived|total_crimes"),
       stars = c(0.01, 0.05, 0.1),
       digits = 2,
       include.rmse = F,
       include.fstatistic = F,
       caption = "Demographic Determinants of Trust in Police")


# Construct composite measure using PCA ---------------------------------------

# Define three outcome variables that go into PCA
outcome_varlist <- c('police_fair', 'police_listen', 'police_preferential')

# Subset dataset to non-missing on outcomes for PCA
nomiss <- subset(exp_sample, complete.cases(exp_sample[outcome_varlist]) == T)

# Run PCA
pca_out <- prcomp(nomiss[,outcome_varlist])

# Create composite from first principal component [explains 88% of variance]
# And rescale to same 1 - 5 range as other outcomes
nomiss$composite <- rescale(pca_out$x[,1], to = c(1, 5)) 

# merge composite back in with experimental sample
exp_sample <- left_join(exp_sample, nomiss[, c("resp_id", "composite")],
                          by = "resp_id")

# Figure 3: Outcomes by Treatment Condition -------------------------------

# Get mean and 95% CI by treatment status for composite measure
composite <- exp_sample %>%
  groupwiseMean(composite ~ coeth_police, data = ., conf = 0.95, na.rm = T) %>%
  mutate(outcome = "composite")

# Get mean and 95% CI by treatment status for police listen measure
listen <- exp_sample %>%
  groupwiseMean(police_listen ~ coeth_police, data = ., conf = 0.95, na.rm = T) %>%
  mutate(outcome = "listen")

# Get mean and 95% CI by treatment status for police fair measure
fair <- exp_sample %>%
  groupwiseMean(police_fair ~ coeth_police, data = ., conf = 0.95, na.rm = T) %>%
  mutate(outcome = "fair")

# Get mean and 95% CI by treatment status for police preferential measure
preferential <- exp_sample %>%
  groupwiseMean(police_preferential ~ coeth_police, data = ., conf = 0.95, na.rm = T) %>%
  mutate(outcome = "preferential")

# Combine means and 95% CIs across 4 measures and plot
rbind(composite, listen, fair, preferential) %>%
  ggplot(aes(x = outcome, group = coeth_police, color = factor(coeth_police))) + 
  geom_errorbar(aes(ymin = Trad.lower, ymax = Trad.upper), width = 0, 
                position = position_dodge(width = .5), size = 1.5) + 
  geom_point(aes(y = Mean), position = position_dodge(width = .5), size = 2,
             shape = 21, stroke = 1.5, fill = "white") + 
  scale_x_discrete(limits = c("composite", "listen", "fair", "preferential"),
                   labels=c("composite" = "Composite \n Measure",
                            "listen" = "Police \n Listen",
                            "fair" = "Police \n Fair",
                            "preferential" = "Police \n Preferential")) +
  scale_color_manual(values = c("darkgray", "black"), 
                     labels = c("Non-coethnic \nPolice Treatment", "Coethnic \nPolice Treatment")) + 
  labs(x = "\n Outcome",
       y = "Expectations of Police \nBehavior \n") +
  theme_bw() + 
  theme(axis.text.y = element_text(angle = 30, vjust = 0.5),
        axis.text=element_text(size=12),
        axis.title=element_text(size=14,face="bold"),
        legend.position="bottom",
        legend.title = element_blank(),
        legend.text=element_text(size=12),
        legend.key.width = unit(2, "line")) 

ggsave(paste0(root, "/figure3.pdf"), plot = last_plot(),
       height = 4, width = 6, units = "in")

# Table 2: Effect of Coethnicity on Perceptions ---------------------------

# Regress each outcome on randomized exposure to coethnic police profile
reg_composite <- lm_robust(composite ~ coeth_police, data = exp_sample)
reg_listen <- lm_robust(police_listen ~ coeth_police, data = exp_sample)
reg_fair <- lm_robust(police_fair ~ coeth_police, data = exp_sample)
reg_preferential <- lm_robust(police_preferential ~ coeth_police, data = exp_sample)

# Output regression results
texreg(list(reg_composite, reg_listen, reg_fair, reg_preferential),
       include.ci = F,
       stars = c(0.01, 0.05, 0.1),
       digits = 2,
       include.rmse = F,
       include.fstatistic = F,
       caption = "The Effect of Coethnicity on Perceptions of Interactions with the Police")


# Table 3: Heterogeneous Effects by Descriptive Representation ------------

# Regress each outcome on interaction between treatment and descriptive representation
composite_inter <- lm_robust(composite ~ coeth_police*n_coethnic, data = exp_sample)
listen_inter <- lm_robust(police_listen ~ coeth_police*n_coethnic, data = exp_sample)
fair_inter <- lm_robust(police_fair ~ coeth_police*n_coethnic, data = exp_sample)
preferential_inter <- lm_robust(police_preferential ~ coeth_police*n_coethnic, data = exp_sample)

# Output regression results
texreg(list(composite_inter, listen_inter, fair_inter, preferential_inter),
       include.ci = F,
       stars = c(0.01, 0.05, 0.1),
       digits = 3,
       include.rmse = F,
       include.fstatistic = F,
       caption = "Heterogeneous Treatment Effects by Perceptions of Descriptive Representation")


# Figure 4: Composite Measure - Interaction Model -------------------------

# Make synthetic data frame for model predictions
pred_df <- tibble(
  coeth_police = c(rep(0,101), rep(1,101)),
  n_coethnic = c(rep(seq(0,100,1),2))
)

# Get predictions for composite measure, using interaction model
pred_out <- predict(composite_inter, pred_df, interval = "confidence", level = 0.95) %>%
  cbind(pred_df)

# Plot predictions and CI from interaction model for composite measure
interaction_plot <- ggplot(pred_out, aes(x = n_coethnic, y = fit.fit, 
                                         color = as.factor(coeth_police),
                                         lty = as.factor(coeth_police))) +
  geom_line(size = 1.25) +
  geom_ribbon(aes(ymin = fit.lwr, ymax = fit.upr), alpha = 0.25) +
  scale_color_manual(values= c("darkgray", "black"),
                     labels = c("Non-coethnic Police Treatment", "Coethnic Police Treatment"),
                     name = '') +
  scale_linetype_manual(values= c("solid", "dashed"),
                        labels = c("Non-coethnic Police Treatment", "Coethnic Police Treatment"),
                        name = '') +
  labs(x = "\nNumber Coethnic Police out of 100",
       y = "Predicted Value from \nInteraction Model") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5)) + 
  theme(axis.text.y = element_text(angle = 30, vjust = 0.5),
        axis.text=element_text(size=12),
        axis.title=element_text(size=14,face="bold"))

# Create histogram to add to x-axis
xhist <- axis_canvas(interaction_plot, axis = "x") +
  geom_histogram(data = subset(exp_sample, !is.na(composite)), 
                 aes(x = n_coethnic, fill = as.factor(coeth_police)),
                 bins = 25) +
  scale_fill_manual(values = c("darkgray", "black")) 

# Combine interaction plot with x-axis histogram
combined_plot <- insert_xaxis_grob(interaction_plot, xhist, position = "bottom")

# Save the resulting combined plot
ggsave(paste0(root, "/figure4.pdf"), plot = combined_plot, 
       width = 7, height = 4, units = "in")

